package com.wjd.parser;

/**
 * 网页分析器
 *
 * @author weijiaduo
 * @since 2024/3/12
 */
public class PageAnalyzer {

    private final ChineseParser parser;

    public PageAnalyzer() {
        this(new ChineseParser());
    }

    public PageAnalyzer(ChineseParser parser) {
        this.parser = parser;
    }

    /**
     * 对网页内容进行分析
     *
     * @param pageContent 网页内容
     * @return 网页信息
     */
    public PageInfo analyze(PageContent pageContent) {
        String url = pageContent.getUrl();
        String title = pageContent.getTitle();
        String cont = pageContent.getContent();
        int outDegree = pageContent.getOutDegree();

        // 打包处理后的数据，传送给索引器
        PageInfo pageInfo = new PageInfo();
        pageInfo.setUrl(url);
        pageInfo.setPageValue(outDegree);
        pageInfo.setTitle(title);
        pageInfo.setContent(cont);
        pageInfo.setTitleTF(parser.getTF(title));
        pageInfo.setContentTF(parser.getTF(cont));
        return pageInfo;
    }

}
